df <- read.csv("merged-new-version2.csv", header =TRUE, sep=",")
#df <- df[!complete.cases(df), ]  
df
df <- read.csv("merged-variety.csv", header =TRUE, sep=",")
#df <- df[!complete.cases(df), ]  
df
df <- read.csv("merged-added-functions.csv", header =TRUE, sep=",")
#df <- df[!complete.cases(df), ]  
df
df$ln_novelty <- log(df$novelty+1)
df$ln_total <- log(df$total+1) 
df$ln_exploration <- log(df$exploration+1) 
df$group = factor(df$group)
df$ln_len_unique <- log(df$len_unique+1) 
df$ln_added_sum <- log(df$added_sum+1)
df
df_new <- df[, sapply(df, is.numeric)]
cor(df_new, use = "complete.obs", method = "spearman" )
                                 X   Unnamed..0       phase     novelty abs_perform_diff_best      Q7_Q7_1     Q7_Q7_2     Q8_Q8_1         Q10
X                      1.000000000  1.000000000  0.24234098 -0.04741331          -0.038818179 -0.007028783 -0.05468920 -0.04967287  0.07080519
Unnamed..0             1.000000000  1.000000000  0.24234098 -0.04741331          -0.038818179 -0.007028783 -0.05468920 -0.04967287  0.07080519
phase                  0.242340977  0.242340977  1.00000000  0.11614783          -0.087823892  0.000000000  0.00000000  0.00000000  0.00000000
novelty               -0.047413312 -0.047413312  0.11614783  1.00000000          -0.269279723  0.080018022  0.18380978  0.15335427  0.08843367
abs_perform_diff_best -0.038818179 -0.038818179 -0.08782389 -0.26927972           1.000000000  0.051617014 -0.15978122 -0.12990581 -0.23656776
Q7_Q7_1               -0.007028783 -0.007028783  0.00000000  0.08001802           0.051617014  1.000000000  0.59849196  0.23554938  0.18107041
Q7_Q7_2               -0.054689204 -0.054689204  0.00000000  0.18380978          -0.159781215  0.598491958  1.00000000  0.30805235  0.25765206
Q8_Q8_1               -0.049672874 -0.049672874  0.00000000  0.15335427          -0.129905810  0.235549382  0.30805235  1.00000000  0.30652732
Q10                    0.070805188  0.070805188  0.00000000  0.08843367          -0.236567760  0.181070414  0.25765206  0.30652732  1.00000000
count                 -0.048992588 -0.048992588 -0.11856704  0.31277705          -0.390547953 -0.021088625  0.03851054  0.04791366  0.12437608
total                 -0.085035463 -0.085035463  0.21237656  0.35093689          -0.731639428 -0.071894159  0.14138398  0.14739393  0.22020677
user.requirement      -0.090544990 -0.090544990  0.17631252  0.25602042          -0.600678779 -0.100219792  0.07490003  0.12716391  0.17775575
infovis               -0.065173870 -0.065173870  0.20842670  0.24858512          -0.626350528 -0.027634236  0.15188458  0.12070087  0.18071434
novelty_score          0.021684030  0.021684030  0.16844298  0.25940071          -0.612495128 -0.092662465  0.11024003  0.12790068  0.17506615
exploration           -0.137918112 -0.137918112 -0.23051101  0.35969601          -0.113695104 -0.003202928  0.03283455 -0.02093143  0.02079226
Group                 -0.968135174 -0.968135174  0.00000000  0.13955343           0.002413445  0.012148461  0.06764756  0.06088616 -0.06719948
len_unique            -0.012268317 -0.012268317  0.19026679  0.61941904          -0.476219151  0.125547394  0.21213738  0.25660123  0.24104473
added_sum             -0.096339112 -0.096339112 -0.12989002  0.42763970          -0.237174055  0.041498199  0.07157854  0.08453075  0.11804975
ln_novelty            -0.047413312 -0.047413312  0.11614783  1.00000000          -0.269279723  0.080018022  0.18380978  0.15335427  0.08843367
ln_total              -0.085035463 -0.085035463  0.21237656  0.35093689          -0.731639428 -0.071894159  0.14138398  0.14739393  0.22020677
ln_exploration        -0.137918112 -0.137918112 -0.23051101  0.35969601          -0.113695104 -0.003202928  0.03283455 -0.02093143  0.02079226
ln_len_unique         -0.012268317 -0.012268317  0.19026679  0.61941904          -0.476219151  0.125547394  0.21213738  0.25660123  0.24104473
ln_added_sum          -0.096339112 -0.096339112 -0.12989002  0.42763970          -0.237174055  0.041498199  0.07157854  0.08453075  0.11804975
                            count       total user.requirement     infovis novelty_score  exploration        Group  len_unique   added_sum
X                     -0.04899259 -0.08503546      -0.09054499 -0.06517387    0.02168403 -0.137918112 -0.968135174 -0.01226832 -0.09633911
Unnamed..0            -0.04899259 -0.08503546      -0.09054499 -0.06517387    0.02168403 -0.137918112 -0.968135174 -0.01226832 -0.09633911
phase                 -0.11856704  0.21237656       0.17631252  0.20842670    0.16844298 -0.230511006  0.000000000  0.19026679 -0.12989002
novelty                0.31277705  0.35093689       0.25602042  0.24858512    0.25940071  0.359696008  0.139553432  0.61941904  0.42763970
abs_perform_diff_best -0.39054795 -0.73163943      -0.60067878 -0.62635053   -0.61249513 -0.113695104  0.002413445 -0.47621915 -0.23717405
Q7_Q7_1               -0.02108862 -0.07189416      -0.10021979 -0.02763424   -0.09266247 -0.003202928  0.012148461  0.12554739  0.04149820
Q7_Q7_2                0.03851054  0.14138398       0.07490003  0.15188458    0.11024003  0.032834551  0.067647564  0.21213738  0.07157854
Q8_Q8_1                0.04791366  0.14739393       0.12716391  0.12070087    0.12790068 -0.020931428  0.060886156  0.25660123  0.08453075
Q10                    0.12437608  0.22020677       0.17775575  0.18071434    0.17506615  0.020792263 -0.067199483  0.24104473  0.11804975
count                  1.00000000  0.46015412       0.33312380  0.37702248    0.38263169  0.553706760  0.041699147  0.38531970  0.58243780
total                  0.46015412  1.00000000       0.83475255  0.83755499    0.84153034  0.286473499  0.160285056  0.57545048  0.40088417
user.requirement       0.33312380  0.83475255       1.00000000  0.79279588    0.55250554  0.192288215  0.151617873  0.38163565  0.27283404
infovis                0.37702248  0.83755499       0.79279588  1.00000000    0.57296640  0.196935460  0.132983326  0.43612011  0.27590912
novelty_score          0.38263169  0.84153034       0.55250554  0.57296640    1.00000000  0.237021121  0.034241557  0.50959864  0.36169269
exploration            0.55370676  0.28647350       0.19228822  0.19693546    0.23702112  1.000000000  0.109072879  0.33885513  0.89826000
Group                  0.04169915  0.16028506       0.15161787  0.13298333    0.03424156  0.109072879  1.000000000  0.09829201  0.09539310
len_unique             0.38531970  0.57545048       0.38163565  0.43612011    0.50959864  0.338855129  0.098292010  1.00000000  0.54850475
added_sum              0.58243780  0.40088417       0.27283404  0.27590912    0.36169269  0.898259998  0.095393103  0.54850475  1.00000000
ln_novelty             0.31277705  0.35093689       0.25602042  0.24858512    0.25940071  0.359696008  0.139553432  0.61941904  0.42763970
ln_total               0.46015412  1.00000000       0.83475255  0.83755499    0.84153034  0.286473499  0.160285056  0.57545048  0.40088417
ln_exploration         0.55370676  0.28647350       0.19228822  0.19693546    0.23702112  1.000000000  0.109072879  0.33885513  0.89826000
ln_len_unique          0.38531970  0.57545048       0.38163565  0.43612011    0.50959864  0.338855129  0.098292010  1.00000000  0.54850475
ln_added_sum           0.58243780  0.40088417       0.27283404  0.27590912    0.36169269  0.898259998  0.095393103  0.54850475  1.00000000
                       ln_novelty    ln_total ln_exploration ln_len_unique ln_added_sum
X                     -0.04741331 -0.08503546   -0.137918112   -0.01226832  -0.09633911
Unnamed..0            -0.04741331 -0.08503546   -0.137918112   -0.01226832  -0.09633911
phase                  0.11614783  0.21237656   -0.230511006    0.19026679  -0.12989002
novelty                1.00000000  0.35093689    0.359696008    0.61941904   0.42763970
abs_perform_diff_best -0.26927972 -0.73163943   -0.113695104   -0.47621915  -0.23717405
Q7_Q7_1                0.08001802 -0.07189416   -0.003202928    0.12554739   0.04149820
Q7_Q7_2                0.18380978  0.14138398    0.032834551    0.21213738   0.07157854
Q8_Q8_1                0.15335427  0.14739393   -0.020931428    0.25660123   0.08453075
Q10                    0.08843367  0.22020677    0.020792263    0.24104473   0.11804975
count                  0.31277705  0.46015412    0.553706760    0.38531970   0.58243780
total                  0.35093689  1.00000000    0.286473499    0.57545048   0.40088417
user.requirement       0.25602042  0.83475255    0.192288215    0.38163565   0.27283404
infovis                0.24858512  0.83755499    0.196935460    0.43612011   0.27590912
novelty_score          0.25940071  0.84153034    0.237021121    0.50959864   0.36169269
exploration            0.35969601  0.28647350    1.000000000    0.33885513   0.89826000
Group                  0.13955343  0.16028506    0.109072879    0.09829201   0.09539310
len_unique             0.61941904  0.57545048    0.338855129    1.00000000   0.54850475
added_sum              0.42763970  0.40088417    0.898259998    0.54850475   1.00000000
ln_novelty             1.00000000  0.35093689    0.359696008    0.61941904   0.42763970
ln_total               0.35093689  1.00000000    0.286473499    0.57545048   0.40088417
ln_exploration         0.35969601  0.28647350    1.000000000    0.33885513   0.89826000
ln_len_unique          0.61941904  0.57545048    0.338855129    1.00000000   0.54850475
ln_added_sum           0.42763970  0.40088417    0.898259998    0.54850475   1.00000000
library(car)
Loading required package: carData
mod <- lm(ln_total~ ln_novelty + ln_len_unique, data=df)
vif(mod)
   ln_novelty ln_len_unique 
     1.780538      1.780538 
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_added_sum ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_added_sum ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8830 -1.8139 -0.4864  1.3270  6.6483 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     1.86244    0.15778  11.804   <2e-16 ***
factor(group)0 -0.55034    0.22166  -2.483   0.0133 *  
factor(group)1 -0.04857    0.21891  -0.222   0.8245    
factor(group)2  0.02058    0.21763   0.095   0.9247    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.919 on 620 degrees of freedom
  (12 observations deleted due to missingness)
Multiple R-squared:  0.01469,   Adjusted R-squared:  0.009923 
F-statistic: 3.081 on 3 and 620 DF,  p-value: 0.02695
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_exploration ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_exploration ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.2275 -0.1862 -0.1563  0.1866  0.5328 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.22749    0.01941  11.719   <2e-16 ***
factor(group)0 -0.06718    0.02710  -2.479   0.0134 *  
factor(group)1 -0.04128    0.02678  -1.542   0.1236    
factor(group)2 -0.03052    0.02662  -1.146   0.2521    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2362 on 632 degrees of freedom
Multiple R-squared:  0.009905,  Adjusted R-squared:  0.005206 
F-statistic: 2.108 on 3 and 632 DF,  p-value: 0.09805
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_len_unique ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_len_unique ~ factor(group), data = df)

Residuals:
   Min     1Q Median     3Q    Max 
-4.000 -1.004  0.126  1.144  5.135 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      3.7016     0.1562  23.705  < 2e-16 ***
factor(group)0  -0.8832     0.2194  -4.026 6.38e-05 ***
factor(group)1   0.1472     0.2167   0.679    0.497    
factor(group)2   0.2984     0.2154   1.385    0.166    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.9 on 620 degrees of freedom
  (12 observations deleted due to missingness)
Multiple R-squared:  0.05512,   Adjusted R-squared:  0.05055 
F-statistic: 12.06 on 3 and 620 DF,  p-value: 1.115e-07
tapply(df$ln_len_unique, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   2.773   3.726   3.702   4.511   8.514 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   3.497   2.818   4.143   7.953       4 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   2.303   4.086   3.849   5.017   8.415       4 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   3.091   3.892   4.000   4.878   8.489       4 
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_total ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_total ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7373 -0.2143  0.3493  0.8471  1.7667 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      5.1441     0.1181  43.541  < 2e-16 ***
factor(group)0  -1.0417     0.1649  -6.316 5.05e-10 ***
factor(group)1  -0.4069     0.1630  -2.497 0.012787 *  
factor(group)2  -0.5990     0.1620  -3.697 0.000237 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.437 on 632 degrees of freedom
Multiple R-squared:  0.06155,   Adjusted R-squared:  0.0571 
F-statistic: 13.82 on 3 and 632 DF,  p-value: 9.76e-09
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_novelty ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_novelty ~ factor(group), data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.52892 -0.14068  0.06865  0.15783  0.28954 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.52892    0.01773  29.837  < 2e-16 ***
factor(group)0 -0.13269    0.02475  -5.362 1.16e-07 ***
factor(group)1 -0.12367    0.02445  -5.058 5.56e-07 ***
factor(group)2 -0.05178    0.02431  -2.130   0.0336 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2157 on 632 degrees of freedom
Multiple R-squared:  0.05844,   Adjusted R-squared:  0.05397 
F-statistic: 13.08 on 3 and 632 DF,  p-value: 2.706e-08
df$group <- relevel(df$group, ref = "3")
mod2 <- lm(ln_exploration ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod2)

Call:
lm(formula = ln_exploration ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6625 -0.1580 -0.1158  0.1618  0.5694 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.233602   0.038414   6.081 2.10e-09 ***
factor(group)0 -0.055198   0.026422  -2.089   0.0371 *  
factor(group)1 -0.036783   0.026092  -1.410   0.1591    
factor(group)2 -0.022188   0.025726  -0.862   0.3888    
Q7_Q7_1        -0.003198   0.007597  -0.421   0.6740    
Q7_Q7_2         0.005396   0.007728   0.698   0.4853    
Q8_Q8_1        -0.013705   0.007994  -1.714   0.0870 .  
Q10            -0.003711   0.011739  -0.316   0.7520    
count           0.025482   0.003090   8.248 9.92e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2257 on 611 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1106,    Adjusted R-squared:  0.09895 
F-statistic: 9.497 on 8 and 611 DF,  p-value: 1.997e-12
df$group <- relevel(df$group, ref = "3")
mod3 <- lm(ln_exploration ~  Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod3)

Call:
lm(formula = ln_exploration ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6609 -0.1563 -0.1278  0.1708  0.5594 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.201974   0.033922   5.954 4.40e-09 ***
Q7_Q7_1     -0.003721   0.007550  -0.493    0.622    
Q7_Q7_2      0.006447   0.007670   0.841    0.401    
Q8_Q8_1     -0.012414   0.007974  -1.557    0.120    
Q10         -0.006051   0.011524  -0.525    0.600    
count        0.025721   0.003089   8.326 5.47e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.226 on 614 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1038,    Adjusted R-squared:  0.09647 
F-statistic: 14.22 on 5 and 614 DF,  p-value: 3.509e-13
anova(mod2, mod3)
Analysis of Variance Table

Model 1: ln_exploration ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + 
    Q10 + count
Model 2: ln_exploration ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1    611 31.133                           
2    614 31.372 -3  -0.23919 1.5647 0.1968
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod)

Call:
lm(formula = ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73108 -0.10789  0.05269  0.14730  0.30517 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.412100   0.035171  11.717  < 2e-16 ***
factor(group)0 -0.113961   0.024192  -4.711 3.06e-06 ***
factor(group)1 -0.116408   0.023889  -4.873 1.40e-06 ***
factor(group)2 -0.051286   0.023555  -2.177  0.02984 *  
Q7_Q7_1        -0.020611   0.006956  -2.963  0.00316 ** 
Q7_Q7_2         0.028904   0.007075   4.085 4.99e-05 ***
Q8_Q8_1         0.008860   0.007319   1.210  0.22656    
Q10             0.007122   0.010748   0.663  0.50783    
count           0.013293   0.002829   4.699 3.23e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2067 on 611 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1234,    Adjusted R-squared:  0.112 
F-statistic: 10.75 on 8 and 611 DF,  p-value: 3.249e-14
df$group <- relevel(df$group, ref = "3")
mod1 <- lm(ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod1)

Call:
lm(formula = ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73108 -0.10789  0.05269  0.14730  0.30517 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.412100   0.035171  11.717  < 2e-16 ***
factor(group)0 -0.113961   0.024192  -4.711 3.06e-06 ***
factor(group)1 -0.116408   0.023889  -4.873 1.40e-06 ***
factor(group)2 -0.051286   0.023555  -2.177  0.02984 *  
Q7_Q7_1        -0.020611   0.006956  -2.963  0.00316 ** 
Q7_Q7_2         0.028904   0.007075   4.085 4.99e-05 ***
Q8_Q8_1         0.008860   0.007319   1.210  0.22656    
Q10             0.007122   0.010748   0.663  0.50783    
count           0.013293   0.002829   4.699 3.23e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2067 on 611 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1234,    Adjusted R-squared:  0.112 
F-statistic: 10.75 on 8 and 611 DF,  p-value: 3.249e-14
df$group <- relevel(df$group, ref = "3")
mod4 <- lm(ln_novelty ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod4)

Call:
lm(formula = ln_novelty ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.7883 -0.0854  0.0699  0.1531  0.3014 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.343113   0.031746  10.808  < 2e-16 ***
Q7_Q7_1     -0.023135   0.007066  -3.274  0.00112 ** 
Q7_Q7_2      0.032111   0.007178   4.474 9.17e-06 ***
Q8_Q8_1      0.011171   0.007462   1.497  0.13490    
Q10         -0.001228   0.010785  -0.114  0.90939    
count        0.013646   0.002891   4.720 2.93e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2115 on 614 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.07716,   Adjusted R-squared:  0.06964 
F-statistic: 10.27 on 5 and 614 DF,  p-value: 1.82e-09
anova(mod1, mod4)
Analysis of Variance Table

Model 1: ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count
Model 2: ln_novelty ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1    611 26.099                                  
2    614 27.477 -3   -1.3777 10.751 6.815e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(lmerTest)
fit.lmer <- lmer(ln_novelty ~ factor(group) + ( 1 | phase), data = df, REML= FALSE)
fit.lmer
Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
Formula: ln_novelty ~ factor(group) + (1 | phase)
   Data: df
      AIC       BIC    logLik  deviance  df.resid 
-138.4479 -111.7167   75.2239 -150.4479       630 
Random effects:
 Groups   Name        Std.Dev.
 phase    (Intercept) 0.005242
 Residual             0.214918
Number of obs: 636, groups:  phase, 4
Fixed Effects:
   (Intercept)  factor(group)0  factor(group)1  factor(group)2  
       0.52892        -0.13269        -0.12367        -0.05178  
tapply(df$ln_novelty, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.4842  0.5588  0.5289  0.6162  0.6894 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.5206  0.3962  0.6073  0.6858 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1777  0.5062  0.4053  0.6182  0.6931 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.3871  0.5465  0.4771  0.6084  0.6904 
tapply(df$ln_total, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  4.331   4.761   5.079   5.144   5.515   5.891 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   3.991   4.830   4.102   5.337   5.869 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.553   5.089   4.737   5.580   5.882 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.615   4.925   4.545   5.450   5.884 
tapply(df$ln_exploration, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.0938  0.2275  0.4391  0.6931 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.0000  0.1603  0.3010  0.6931 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.02175 0.18621 0.38244 0.69315 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.09391 0.19697 0.35899 0.69315 
tapply(df$ln_len_unique, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   2.773   3.726   3.702   4.511   8.514 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   3.497   2.818   4.143   7.953       4 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   2.303   4.086   3.849   5.017   8.415       4 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   3.091   3.892   4.000   4.878   8.489       4 
tapply(df$ln_added_sum, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   0.000   1.792   1.862   3.091   8.511 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   0.000   1.312   2.788   7.945       4 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   1.609   1.814   3.091   8.027       4 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   1.792   1.883   3.178   8.484       4 
library(vtree)
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
vtree version 5.6.5 -- For more information, type: vignette("vtree")
vtree(df, "group")
vtree(df, c("phase", "group"), 
   fillcolor = c( phase = "#e7d4e8", group = "#99d8c9"),
   horiz = FALSE)
df$group <- relevel(df$group, ref = "3")
mod5 <- lm(ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod5)

Call:
lm(formula = ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + 
    Q10 + count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6309 -0.2310  0.3346  0.7764  1.9667 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     4.82832    0.22926  21.060  < 2e-16 ***
factor(group)0 -0.98353    0.15769  -6.237 8.33e-10 ***
factor(group)1 -0.42360    0.15572  -2.720 0.006709 ** 
factor(group)2 -0.59841    0.15354  -3.897 0.000108 ***
Q7_Q7_1        -0.19585    0.04534  -4.319 1.83e-05 ***
Q7_Q7_2         0.19627    0.04612   4.256 2.41e-05 ***
Q8_Q8_1        -0.10504    0.04771  -2.202 0.028060 *  
Q10             0.17920    0.07006   2.558 0.010776 *  
count           0.12749    0.01844   6.914 1.19e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.347 on 611 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1768,    Adjusted R-squared:  0.166 
F-statistic:  16.4 on 8 and 611 DF,  p-value: < 2.2e-16
df$group <- relevel(df$group, ref = "3")
mod6 <- lm(ln_total ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod6)

Call:
lm(formula = ln_total ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, 
    data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5737 -0.1258  0.3665  0.7666  1.7353 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.19765    0.20821  20.160  < 2e-16 ***
Q7_Q7_1     -0.18970    0.04634  -4.093 4.82e-05 ***
Q7_Q7_2      0.19885    0.04708   4.224 2.77e-05 ***
Q8_Q8_1     -0.07884    0.04894  -1.611   0.1077    
Q10          0.17509    0.07073   2.475   0.0136 *  
count        0.13321    0.01896   7.025 5.71e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.387 on 614 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1226,    Adjusted R-squared:  0.1154 
F-statistic: 17.16 on 5 and 614 DF,  p-value: 6.62e-16
anova(mod5, mod6)
Analysis of Variance Table

Model 1: ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count
Model 2: ln_total ~ Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count
  Res.Df  RSS Df Sum of Sq      F    Pr(>F)    
1    611 1109                                  
2    614 1182 -3   -73.013 13.409 1.744e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
with(df, interaction.plot(group, phase, ln_total, ylim=c(0, max(ln_total)))) # interaction plot

with(df, interaction.plot(group, phase, ln_novelty, ylim=c(0, max(ln_novelty)))) # interaction plot

with(df, interaction.plot(group, phase, ln_exploration, ylim=c(0, max(ln_exploration)))) # interaction plot

with(df, interaction.plot(group, phase, ln_novelty, ylim=c(0, max(ln_novelty)))) # interaction plot

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmRmIDwtIHJlYWQuY3N2KCJtZXJnZWQtbmV3LXZlcnNpb24yLmNzdiIsIGhlYWRlciA9VFJVRSwgc2VwPSIsIikKI2RmIDwtIGRmWyFjb21wbGV0ZS5jYXNlcyhkZiksIF0gIApkZgpgYGAKYGBge3J9CmRmIDwtIHJlYWQuY3N2KCJtZXJnZWQtdmFyaWV0eS5jc3YiLCBoZWFkZXIgPVRSVUUsIHNlcD0iLCIpCiNkZiA8LSBkZlshY29tcGxldGUuY2FzZXMoZGYpLCBdICAKZGYKYGBgCmBgYHtyfQpkZiA8LSByZWFkLmNzdigibWVyZ2VkLWFkZGVkLWZ1bmN0aW9ucy5jc3YiLCBoZWFkZXIgPVRSVUUsIHNlcD0iLCIpCiNkZiA8LSBkZlshY29tcGxldGUuY2FzZXMoZGYpLCBdICAKZGYKYGBgCgoKYGBge3J9CmRmJGxuX25vdmVsdHkgPC0gbG9nKGRmJG5vdmVsdHkrMSkKZGYkbG5fdG90YWwgPC0gbG9nKGRmJHRvdGFsKzEpIApkZiRsbl9leHBsb3JhdGlvbiA8LSBsb2coZGYkZXhwbG9yYXRpb24rMSkgCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpkZiRsbl9sZW5fdW5pcXVlIDwtIGxvZyhkZiRsZW5fdW5pcXVlKzEpIApkZiRsbl9hZGRlZF9zdW0gPC0gbG9nKGRmJGFkZGVkX3N1bSsxKQpkZgpgYGAKYGBge3J9CmRmX25ldyA8LSBkZlssIHNhcHBseShkZiwgaXMubnVtZXJpYyldCmNvcihkZl9uZXcsIHVzZSA9ICJjb21wbGV0ZS5vYnMiLCBtZXRob2QgPSAic3BlYXJtYW4iICkKYGBgCgoKYGBge3J9CmxpYnJhcnkoY2FyKQptb2QgPC0gbG0obG5fdG90YWx+IGxuX25vdmVsdHkgKyBsbl9sZW5fdW5pcXVlLCBkYXRhPWRmKQp2aWYobW9kKQpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl9hZGRlZF9zdW0gfiBmYWN0b3IoZ3JvdXApLCBkYXRhPWRmKQpzdW1tYXJ5KG1vZCkKYGBgCgoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kIDwtIGxtKGxuX2V4cGxvcmF0aW9uIH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYApgYGB7cn0KZGYkZ3JvdXAgPC0gcmVsZXZlbChkZiRncm91cCwgcmVmID0gIjMiKQptb2QgPC0gbG0obG5fbGVuX3VuaXF1ZSB+IGZhY3Rvcihncm91cCksIGRhdGE9ZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQp0YXBwbHkoZGYkbG5fbGVuX3VuaXF1ZSwgZGYkZ3JvdXAsIHN1bW1hcnkpCmBgYAoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kIDwtIGxtKGxuX3RvdGFsIH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kIDwtIGxtKGxuX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApLCBkYXRhPWRmKQpzdW1tYXJ5KG1vZCkKYGBgCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZDIgPC0gbG0obG5fZXhwbG9yYXRpb24gfiBmYWN0b3IoZ3JvdXApICsgUTdfUTdfMSArIFE3X1E3XzIgKyBROF9ROF8xICsgUTEwICsgY291bnQsIGRhdGE9ZGYpCnN1bW1hcnkobW9kMikKYGBgCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZDMgPC0gbG0obG5fZXhwbG9yYXRpb24gfiAgUTdfUTdfMSArIFE3X1E3XzIgKyBROF9ROF8xICsgUTEwICsgY291bnQsIGRhdGE9ZGYpCnN1bW1hcnkobW9kMykKYGBgCmBgYHtyfQphbm92YShtb2QyLCBtb2QzKQpgYGAKCgpgYGB7cn0KZGYkZ3JvdXAgPC0gcmVsZXZlbChkZiRncm91cCwgcmVmID0gIjMiKQptb2QgPC0gbG0obG5fbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgKyBjb3VudCwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kMSA8LSBsbShsbl9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCArIGNvdW50LCBkYXRhPWRmKQpzdW1tYXJ5KG1vZDEpCmBgYApgYGB7cn0KZGYkZ3JvdXAgPC0gcmVsZXZlbChkZiRncm91cCwgcmVmID0gIjMiKQptb2Q0IDwtIGxtKGxuX25vdmVsdHkgfiBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgKyBjb3VudCwgZGF0YT1kZikKc3VtbWFyeShtb2Q0KQpgYGAKCgoKYGBge3J9CmFub3ZhKG1vZDEsIG1vZDQpCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KGxtZXJUZXN0KQpmaXQubG1lciA8LSBsbWVyKGxuX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgKCAxIHwgcGhhc2UpLCBkYXRhID0gZGYsIFJFTUw9IEZBTFNFKQpmaXQubG1lcgpgYGAgCgpgYGB7cn0KdGFwcGx5KGRmJGxuX25vdmVsdHksIGRmJGdyb3VwLCBzdW1tYXJ5KQpgYGAKCmBgYHtyfQp0YXBwbHkoZGYkbG5fdG90YWwsIGRmJGdyb3VwLCBzdW1tYXJ5KQpgYGAKYGBge3J9CnRhcHBseShkZiRsbl9leHBsb3JhdGlvbiwgZGYkZ3JvdXAsIHN1bW1hcnkpCmBgYAoKYGBge3J9CnRhcHBseShkZiRsbl9sZW5fdW5pcXVlLCBkZiRncm91cCwgc3VtbWFyeSkKYGBgCmBgYHtyfQp0YXBwbHkoZGYkbG5fYWRkZWRfc3VtLCBkZiRncm91cCwgc3VtbWFyeSkKYGBgCgoKYGBge3J9CmxpYnJhcnkodnRyZWUpCnZ0cmVlKGRmLCAiZ3JvdXAiKQpgYGAKYGBge3J9CnZ0cmVlKGRmLCBjKCJwaGFzZSIsICJncm91cCIpLCAKICAgZmlsbGNvbG9yID0gYyggcGhhc2UgPSAiI2U3ZDRlOCIsIGdyb3VwID0gIiM5OWQ4YzkiKSwKICAgaG9yaXogPSBGQUxTRSkKYGBgCgoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kNSA8LSBsbShsbl90b3RhbCB+IGZhY3Rvcihncm91cCkgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgKyBjb3VudCwgZGF0YT1kZikKc3VtbWFyeShtb2Q1KQpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZDYgPC0gbG0obG5fdG90YWwgfiBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgKyBjb3VudCwgZGF0YT1kZikKc3VtbWFyeShtb2Q2KQpgYGAKCmBgYHtyfQphbm92YShtb2Q1LCBtb2Q2KQpgYGAKCgoKYGBge3J9CndpdGgoZGYsIGludGVyYWN0aW9uLnBsb3QoZ3JvdXAsIHBoYXNlLCBsbl90b3RhbCwgeWxpbT1jKDAsIG1heChsbl90b3RhbCkpKSkgIyBpbnRlcmFjdGlvbiBwbG90CmBgYAoKYGBge3J9CndpdGgoZGYsIGludGVyYWN0aW9uLnBsb3QoZ3JvdXAsIHBoYXNlLCBsbl9ub3ZlbHR5LCB5bGltPWMoMCwgbWF4KGxuX25vdmVsdHkpKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCmBgYHtyfQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBwaGFzZSwgbG5fZXhwbG9yYXRpb24sIHlsaW09YygwLCBtYXgobG5fZXhwbG9yYXRpb24pKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCgoKYGBge3J9CndpdGgoZGYsIGludGVyYWN0aW9uLnBsb3QoZ3JvdXAsIHBoYXNlLCBsbl9ub3ZlbHR5LCB5bGltPWMoMCwgbWF4KGxuX25vdmVsdHkpKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCg==